INFO0808: Visualisation de données - Accidents corporels de la circulation routière Années de 2005 à 2019

Baptiste Dylan, Gandossi Jean-Victor

25/03/2021

Présentation du jeu de données

Fichiers rempli par les forces de l’ordre intervenue sur l’accident.

CARACTERISTIQUES

LIEUX

VÉHICULES

USAGERS

library("ggplot2")
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("lubridate")
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library("plotly")
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library("leaflet")
library("viridisLite")
library("broom")

accidents <- read.csv("clean_datasets/accidents.csv", sep=',', header = TRUE)
usagers <- read.csv("clean_datasets/usagers.csv", sep=',')
anhr <- setNames(data.frame(table(accidents$an, accidents$hr)), c("an", "hr", "count"))

Dates

d1 <- setNames(data.frame(table(as.Date(paste(accidents$an, accidents$mois, accidents$jour, sep='-')))),c("Date","Count"))
d1$day <- weekdays(as.Date(d1$Date))

d2 <- d1 %>%
  mutate(Mois=month(Date), Date = as.Date(paste(year(Date), month(Date), '01', sep = '-'))) %>%
  group_by(Date, Mois) %>%
  summarise(Count = sum(Count))
## `summarise()` has grouped output by 'Date'. You can override using the `.groups` argument.
d3 <- d1 %>%
  mutate(Date = month(Date)) %>%
  group_by(Date) %>%
  summarise(Count = sum(Count))

d4 <- d1 %>%
  mutate(Mois=month(Date), Day=day(Date), p=paste(month(Date), day(Date)), Date = as.Date(paste(year(Date), month(Date), day(Date), sep = '-'))) %>%
  group_by(Day, Mois, p, Date) %>%
  summarise(Count = sum(Count))
## `summarise()` has grouped output by 'Day', 'Mois', 'p'. You can override using the `.groups` argument.
d4 <- d4[ with(d4, order(Mois, Day)), ]

d5 <- setNames(data.frame(table(weekdays(as.Date(paste(accidents$an, accidents$mois, accidents$jour, sep='-'))), accidents$hr )),c("Day", "hr", "Count"))

plot_ly(d1, x=~day) %>%
    add_boxplot(y=~Count) %>%
    layout(title = "Nombre d'accident en fonction des jours de la semaines de 2005 à 2019",
           yaxis = list(title="Nombre d'accidents"),
           xaxis = list(title = "Jours de la semaine", categoryorder = "array", categoryarray = c("lundi", "mardi", "mercredi", "jeudi", "vendredi", "samedi", "dimanche")))
plot_ly(d1, x=~factor(day, levels=c("lundi", "mardi", "mercredi", "jeudi", "vendredi", "samedi", "dimanche"))) %>%
    add_boxplot(y=~Count, color=~factor(month.abb[month(Date)], levels = month.abb)) %>%
    layout(title = "Nombre d'accident en fonction des jours de la semaines de 2005 ? 2019", xaxis=list(title="Jours de la semaine"), yaxis=list(title="Nombre d'accidents"), boxmode = "group")
plot_ly(d2, x=~factor(month.abb[Mois], levels = month.abb)) %>%
    add_boxplot(y=~Count) %>%
    layout(title = "Nombre d'accident en fonction des mois de l'annee de 2005 ? 2019", xaxis = list(title="Mois de l'annee"), yaxis = list(title="Nombre d'accidents")) #par mois
plot_ly(d1, x=~factor(month.abb[month(Date)], levels = month.abb)) %>%
    add_boxplot(y=~Count, color=~factor(day , levels=c("lundi", "mardi", "mercredi", "jeudi", "vendredi", "samedi", "dimanche"))) %>%
    layout(title = "Nombre d'accident en fonction des mois de l'annee de 2005 à 2019",xaxis=list(title="Mois de l'annee"), yaxis=list(title="Nombre d'accidents"),boxmode = "group")
plot_ly(d1, x=~day(Date)) %>%
    add_boxplot(y=~Count) %>%
    layout(title = "Nombre d'accident en fonction du jour du mois de l'annee de 2005 à 2019", xaxis=list(title="Jours du mois", yaxis=list(title="Nombre d'accidents")))
plot_ly(d1, x=~week(Date)) %>%
    add_boxplot(y=~Count) %>%
    layout(title = "Nombre d'accident en fonction du numero de la semaine de 2005 à 2019", xaxis = list(title="Numero de la semaine"), yaxis = list(title="Nombre d'accidents")) #par semaine
plot_ly(d4, x=~p) %>%
    add_boxplot(y=~Count) %>%
    layout(xaxis = list(categoryorder = "array", categoryarray =d4$p)) %>%
    layout(title = "Nombre d'accident en fonction des jours de l'annee de 2005 à 2019", xaxis = list(title="Jour de l'ann?e"), yaxis = list(title="Nombre d'accidents")) #par jour
plot_ly(anhr, x=~hr, y=~count, color=~an, type='scatter', mode = 'lines') %>%
    layout(title = "Nombre d'accident en fonction de l'heure de l'annee de 2005 a 2019", xaxis = list(title="Heure"), yaxis = list(title="Nombre d'accident"))
plot_ly(anhr, x=~an, y=~count, color=~hr, type='scatter', mode = 'lines') %>%
    layout(title = "Nombre d'accident en fonction de l'heure de l'annee de 2005 a 2019",xaxis = list(title="Annee"), yaxis = list(title="Nombre d'accident"))
plot_ly(anhr, x=~hr, y=~an, z=~count, type="heatmap") %>%
    layout(title = "Nombre d'accident en fonction de l'heure et de l'annee de 2005 a 2019",xaxis = list(title="Heure"), yaxis = list(title="Annee"))
plot_ly(anhr, x=~hr, y=~an, z=~count, color=~an, type="scatter3d",  mode = 'lines', line = list(width = 30)) # osef de lui ?
plot_ly(anhr, x=~hr, y=~count, color=~an, type="bar")
d5 <- setNames(data.frame(table(weekdays(as.Date(paste(accidents$an, accidents$mois, accidents$jour, sep='-'))), accidents$hr )),c("Day", "hr", "Count"))

plot_ly(d5, x=~Day, y=~Count, color=~hr, type="bar") %>%
    layout(yaxis = list(title="Nombre d'accidents"),
           xaxis = list(title = "Jours de la semaine",
                        categoryorder = "array",
                        categoryarray = c("lundi", "mardi", "mercredi", "jeudi", "vendredi", "samedi", "dimanche")))
plot_ly(d5, x=~hr, y=~Count, color=~factor(Day , levels=c("lundi", "mardi", "mercredi", "jeudi", "vendredi", "samedi", "dimanche")), type="bar")
ggplot(accidents, aes(x=an)) + geom_bar() + ggtitle("Nombre d'accident par an") + xlab("Annee") + ylab("Nombre d'accident") + geom_text(aes(label = ..count..), stat = "count", vjust = 1.5, colour = "white")

ggplot(accidents, aes(x=mois)) + geom_bar()+ggtitle("Nombre d'accident par mois")+ xlab("Mois")+ylab("Nombre d'accident")+ geom_text(aes(label = ..count..), stat = "count", vjust = 1.5, colour = "white")

Description des places

Description des places

d8 <- setNames(data.frame(table(usagers[usagers$place != 1,]$place)), c('place', 'count'))
plot_ly(d8, x=~place) %>%
    add_bars(d8, y=~count, text=~paste0(formatC(100 * count/sum(count), format='f', digits = 2), "%\n\n", place), textposition = 'auto') %>%
    layout(xaxis=list(showticklabels = FALSE))
labeller.sexe <- function(variable, value){ return (list("1"="Homme", "2"="Femme")[value])}
d7 <- setNames(data.frame(table(usagers$an - usagers$an_nais, usagers$sexe, usagers$grav)), c("age", "sexe", "grav", "total"))

ggplotly(ggplot(d7) + aes(x=age, y=total, fill=factor(grav, levels=c(1,4,3,2))) + geom_bar(stat="identity") + scale_x_discrete(name="âge", breaks = seq(0, 100, 5)))
ggplotly(ggplot(d7) + aes(x=age, y=total, fill=factor(grav, levels=c(1,4,3,2))) + geom_bar(stat="identity") + facet_wrap(~sexe, labeller=labeller.sexe) + scale_x_discrete(name="âge", breaks = seq(0, 100, 5)))
d6 <- d1[,c("Date", "Count")]
d6$breaks <- as.Date(cut(as.Date(d1$Date, format = "%Y-%m-%d"), breaks = "33 days"), format = "%Y-%m-%d")
d6 <- d6 %>% group_by(breaks) %>% summarise(Count = sum(Count))
lois = data.frame(Date = c("2012-01-05", "2012-07-01", "2018-07-01"), text=c("avertisseurs de radars interdits", "éthylotest obligatoirs", "loi 80Km/h"))

l <- loess(Count ~ as.numeric(breaks), data=d6)
r <- setNames(augment(l), c("Count", ".se.fit", ".fitted"))
r$breaks <- d6$breaks

plot_ly(d6, x=~breaks) %>%
    add_lines(y=~Count, name='Total', opacity=0.25, line = list(color = '#553333')) %>%
    add_lines(y=~fitted(l), line=list(color='#07A4B5'), name="Loess Smoother", showlegend=TRUE) %>%
    add_ribbons(data=r, ymin = ~.fitted - 0.04 * .se.fit, ymax = ~.fitted + 0.04 * .se.fit, line = list(color = 'rgba(7, 164, 181, 0.05)'), fillcolor = 'rgba(7, 164, 181, 0.2)', name = "Standard Error") %>%
    add_segments(data=lois, x=~Date, xend=~Date, color=~text, colors=~colors, text=~text, y=min(d6$Count), yend=max(d6$Count))

Cartes

ggplot(accidents, aes(x=dep)) + geom_bar()+ggtitle("Nombre d'accidents par departement")+ xlab("Numero de departement")+ylab("Nombre d'accident")+ geom_text(aes(label = dep), vjust=.35, hjust = -.5,angle=90, stat = "count", colour = "black") + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())

Cartes Paris

map.opacity = 0.30
paris <- accidents[(accidents$dep == 75) & (accidents$an == 2019), ] # paris en 2019
leaflet(data = paris) %>% addCircleMarkers(~long, ~lat, color="red", radius= 2, stroke = FALSE, fillOpacity = 0.5) %>% addProviderTiles("Stamen.Toner", options = list(opacity=map.opacity))

Cartes - Marne

marne <- accidents[(accidents$dep == 51) & (accidents$an == 2019), ]

pal <- colorFactor(palette = viridis(100), domain = factor(marne$lum))
leaflet(data = marne) %>% addCircleMarkers(~long, ~lat, color=~pal(lum), radius= 3, stroke = FALSE, fillOpacity = 1) %>% addProviderTiles("Stamen.Toner", options = list(opacity=map.opacity)) %>% addLegend("bottomright", pal = pal, values = ~lum, opacity = 1)
pal <- colorFactor(palette = viridis(100), domain = factor(marne$hr))
leaflet(data = marne) %>% addCircleMarkers(~long, ~lat, color=~pal(hr), radius= 5, stroke = FALSE, fillOpacity = 0.75) %>% addProviderTiles("Stamen.Toner", options = list(opacity=map.opacity)) %>% addLegend("bottomright", pal = pal, values = ~hr, opacity = 1) %>% setView(lng=median(marne$long, na.rm=TRUE), lat=median(marne$lat, na.rm=TRUE), zoom = 10)

Cartes - 02, 51, 93, 60, 77, 75

dep <- accidents[(accidents$dep %in% c("51","2","93", "60", "77", "75")) & (accidents$an > 2018), ]
pal <- colorFactor(palette = viridis(100), domain = factor(dep$hr))
leaflet(data = dep) %>% addCircleMarkers(~long, ~lat, color=~pal(hr), radius= 3, stroke = FALSE, fillOpacity = 0.5) %>% addProviderTiles("Stamen.Toner", options = list(opacity=map.opacity)) %>% addLegend("bottomright", pal = pal, values = ~hr, opacity = 1) %>% setView(lng=median(dep$long, na.rm=TRUE), lat=median(dep$lat, na.rm=TRUE), zoom = 10)

Cartes - Bouches-du-Rhône

bchr <- accidents[(accidents$dep %in% c("13")) & (accidents$an > 2018), ]
pal <- colorFactor(palette = viridis(100), domain = factor(bchr$hr))
leaflet(data = bchr) %>%
    addCircleMarkers(~long, ~lat, color=~pal(hr), radius= 3, stroke = FALSE, fillOpacity = 0.5) %>%
    addProviderTiles("Stamen.Toner", options = list(opacity=map.opacity)) %>% addLegend("bottomright", pal = pal, values = ~hr, opacity = 1) %>%
    setView(lng=median(bchr$long, na.rm=TRUE), lat=median(bchr$lat, na.rm=TRUE), zoom = 10)

Cartes - Reunion

outmer <- accidents[(accidents$dep == '974'), ] # reunion 

pal <- colorFactor(palette = magma(100, direction = -1), domain = factor(outmer$an))
leaflet(data = outmer) %>%
    addCircleMarkers(~long, ~lat, color=~pal(an), radius= 3, stroke = FALSE, fillOpacity = 5) %>%
    addProviderTiles("Stamen.Toner", options = list(opacity=map.opacity)) %>%
    addLegend("bottomright", pal = pal, values =~an, opacity = 1)
head(read.csv("datasets/caracteristiques_2018.csv", sep=',', header = TRUE), 5)
##     Num_Acc an mois jour hrmn lum agg int atm col com                      adr
## 1 2.018e+11 18    1   24 1505   1   1   4   1   1   5   route des Ansereuilles
## 2 2.018e+11 18    2   12 1015   1   2   7   7   7  11 Place du général de Gaul
## 3 2.018e+11 18    3    4 1135   1   2   3   1   7 477           Rue  nationale
## 4 2.018e+11 18    5    5 1735   1   2   1   7   3  52      30 rue Jules Guesde
## 5 2.018e+11 18    6   26 1605   1   2   1   1   3 477       72 rue Victor Hugo
##   gps     lat   long dep
## 1   M 5055737 294992 590
## 2   M 5052936 293151 590
## 3   M 5051243 291714 590
## 4   M 5051974 289123 590
## 5   M 5051607 290605 590
head(read.csv("datasets/caracteristiques_2019.csv", sep=';', header = TRUE), 5)
##     Num_Acc jour mois   an hrmn lum dep   com agg int atm col           adr
## 1 2.019e+11   30   11 2019  130   4  93 93053   1   1   1   2  AUTOROUTE A3
## 2 2.019e+11   30   11 2019  250   3  93 93066   1   1   1   6  AUTOROUTE A1
## 3 2.019e+11   28   11 2019 1515   1  92 92036   1   1   1   4 AUTOROUTE A86
## 4 2.019e+11   30   11 2019 2020   5  94 94069   1   1   1   4            A4
## 5 2.019e+11   30   11 2019  400   3  94 94028   1   1   1   2       A86 INT
##          lat      long
## 1 48,8962100 2,4701200
## 2 48,9307000 2,3688000
## 3 48,9358718 2,3191744
## 4 48,8173295 2,4281502
## 5 48,7763620 2,4332540
leaflet(data = outmer) %>%
    addCircleMarkers(~long, ~-abs(lat), color=~pal(an), radius= 3, stroke = FALSE, fillOpacity = 5) %>%
    addProviderTiles("Stamen.Toner", options = list(opacity=map.opacity)) %>%
    addLegend("bottomright", pal = pal, values =~an, opacity = 1)
table(sort(outmer$an))
## 
## 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 
##  740  778  782  777  720  807  782  725  625  658  662  538  541  621  779

France

d2019 <- accidents[(accidents$an == 2019), ]
leaflet(data = d2019) %>%
    addCircleMarkers(~long, ~lat, radius= 3, stroke = FALSE, fillOpacity = 0.5) %>%
    addProviderTiles("Stamen.Toner", options = list(opacity=map.opacity))